** This Do-File replicates Figure 2 from P. Dutronc, A. Tondini "Large Means-Tested Pensions with Informal Labor Markets" ** 

set more off

cd "" /*Input here directory where the PALMS v3.2 (only years from 2002--2015) is stored */ 

use "south_africa_palms.dta", clear

* Select Population for Estimation * 

keep if age>=55 & age<65 /*Keep Age Window +/-5 around the 60 threshold (included), see text for discussion of this threshold*/ 

keep if pop_group==1 | pop_group==2  /* Keep Black and Coloured individuals only, see text for explanation */ 

drop if sex==. | sex==9 /* Exclude observations with missing value for gender */ 

gen married=(marital_status==1) /* 1/0 variable for marital status */ 


** Adjust weights for multiple year estimation ** 

bys year: egen mean_weight=mean(weight) 
replace weight=weight/mean_weight /* Adjust weights to have the same mean across waves */

** Definition of Informal Employment **

gen tot_inf=1 if status==1 & no_written_contract==1 /*Employees w/o written contract */
replace tot_inf=0 if status==1 & no_written_contract==0  /*Employees w. written contract */
replace tot_inf=1 if status==2 & not_registered==1 /*Self-employed w/o registered business*/
replace tot_inf=0 if status==2 & not_registered==0 /*Self-employed w. registered business*/
replace tot_inf=0 if status==3 | status==4 /*Not Employed*/ 

keep if tot_inf!=. /* Exclude from the estimation those with missing informality status. This is to allow comparability with Table 1 and 2.*/


*** Regressions *** 

gen OAP_1=(age>=60) /* Indicator variable for being at or above the threshold */ 


gen x=age-60 /* center discontinuity at 0 */ 

local f_form x  /* Linear Function Form */ 

local controls i.Province i.married i.education i.pop_group  /* set of control variables */ 

gen coef=. 
gen ub=. 
gen lb=. 


forvalues i= 2002 (2) 2014 {

quietly: reg emp i.year `controls' c.(`f_form') c.(`f_form')#1.OAP_1 1.OAP_1  [w=weight] if sex==1 & (year==`i' | year==`i'+1), robust /* Equation 1 from the paper estimated evrey 2 years */ 

replace coef=_b[1.OAP_1] if (year==`i' )
replace ub=_b[1.OAP_1]+ 1.96*_se[1.OAP_1] if (year==`i' )
replace lb=_b[1.OAP_1]- 1.96*_se[1.OAP_1] if (year==`i' )

}
* Note: Each RD coefficient is estimated on two-years grouped together to have a larger sample *



************
**Figure 2**
************

preserve 

collapse coef ub lb, by(year)

twoway (scatter coef year, mcolor(black) msize(small)) || (line coef year, lcolor(black) lpattern(dash)) ///
|| (rcap ub lb year, lcolor(black)) ||, graphregion( color(white) ) plotregion(  fcolor(white) ) legend(off) xlabel(2002 (1) 2014) xline(2009, lcolor(black) lpattern(dash))

restore 
